*** This do file creates the summary statistics in Tables 4, A2, A3, and A4 using CPS data and Original Policy Categories



clear all
set more off
capture log close

* If needed change global path to point to directory where files are stored on your computer
*global path "I:/DataSets5/Duncan/Dropbox/Recent Minimum Wage Changes/2020.12 NBER Update/JOLE Precommittment Replication"
global dtadir "$path/Data"
global tabdir "$path/Tables"
global figdir "$path/Figures"
global estdir "$path/Estimates"
global logdir "$path/Logfiles"

log using "$logdir//SummaryStatsStandard_CPS_CEPCategories.log", replace

*** Assemble relevant years of the CPS
use "${path}\Data\CPS-2019.dta", clear

*** Drop seniors
drop if age >= 65 | age < 16
drop if empstat == 0

*** Construct economic outcomes of interest 

* if empstat = 10: "At work"
* if empstat = 12: "employed, not at work last week

gen employed = 0
replace employed = 1 if  empstat == 10 |  empstat == 12

*** Assume that armed forces are employed
replace employed = 1 if empstat == 1

**** Construct education variables

gen dropout = 0 
replace dropout = 1 if educ < 73
gen highschool = 0 
replace highschool = 1 if educ == 73
gen somecollege = 0
replace somecollege = 1 if educ >= 81 & educ <= 92
gen collegeplus = 0
replace collegeplus = 1 if educ >= 111 

**** Construct full vs part-time variable

gen parttime = 0
replace parttime  = 1 if inlist(wkstat, 12, 20, 21, 22, 40, 41)

***
* Assign the policy categories
***

merge m:1 statefip using "${path}\Data\min_wage_variables_for_ACS_and_CPS_analysis.dta"
drop _merge 

gen indexer = 0
replace indexer = 1 if originaltype == "Indexer"
gen StatIncreaserLarge = 0
replace StatIncreaserLarge = 1 if indexer == 0 & (jan2015min - jan2013min) >= 1 & (jan2016min - jan2013min) != .
gen statutoryincreasein2014or2015 = 0
replace statutoryincreasein2014or2015 = 1 if (jan2016min - jan2013min) > 0 & indexer == 0
gen StatIncreaserSmall = 0
replace StatIncreaserSmall = 1 if indexer == 0 & statutoryincreasein2014or2015 == 1 & StatIncreaserLarge == 0
replace indexer = 0 if StatIncreaserSmall == 1 | StatIncreaserLarge == 1


*** Generate hours worked variable. Use hours worked in the previous week.
*** If people are NIU recode hours worked as 0.
*** If people are in the armed forces assume they work 40 hours per week.
*** Topcode hours worked at 100 per week
gen workhrs = ahrsworkt
replace workhrs = 0 if ahrsworkt == 999
replace workhrs = 40 if ahrsworkt == 999 & empstat == 1
replace workhrs = 100 if workhrs > 100

** creates mid-skill employment rate 
gen group = 0
replace group = 1 if (age <= 30 & age > 21 & highschool == 1) | (age > 30 & age <= 45 & dropout == 1) | (age > 45 & age < 65 & dropout == 1)

egen stateempD = mean(employed) if group == 1, by(month year statefip)
egen stateempE = max(stateempD), by(month year statefip)

gen quarter = 1 if inlist(month,1,2,3)
replace quarter = 2 if inlist(month,4,5,6)
replace quarter = 3 if inlist(month,7,8,9)
replace quarter = 4 if inlist(month,10,11,12)

merge m:1 statefip year quarter using "${path}\Data\HPI_2019.dta"
drop if _merge == 2
drop _merge

rename wtfinl cmpwgt

merge m:1 statefip year quarter using "${path}\Data\PersonalIncome_2019.dta"
replace PersonalIncome = PersonalIncome/1000
drop if _merge == 2
drop _merge

**
* CHECK STATE POLICY CATEGORIES 
**

tab statefip if StatIncreaserLarge == 1
tab statefip if StatIncreaserSmall == 1
tab statefip if indexer == 1


gen black = 0 
replace black = 1 if race == 200


* Generate January minimum wage variable
gen EffectiveMinimumWage =.
forvalues i=2011/2019 {
	replace EffectiveMinimumWage = jan`i'min if year == `i'
}

* Drop uneeded variables and compress data to speed estimation
capture drop serial hwtfinl hwtsupp cpsid asecflag pernum cpsidp wtsupp originaltype jan2011min jan2012min jan2013min jan2014min statincreasebyjan2014 jan2015min statincreasebyjan2015 jan2016min ///
statincreasebyjan2016 jan2017min jan2018min change2014 change2015 change2016 change2017 change2018 changesby2014 changesby2015 changesby2016 changesby2017 ///
changesby2018

gen HPI_young = HPI if age <= 21
gen PersonalIncome_young = PersonalIncome if age <= 21
gen employed_young = employed if age <= 21
gen employed_lowskill = employed if dropout == 1 & age <= 25
gen employed_primeage = employed if age >= 26 & age <= 54
gen employed_midskill = employed if group == 1

compress

gen type = .
replace type = 1 if indexer == 0 &  StatIncreaserSmall == 0 & StatIncreaserLarge == 0
replace type = 2 if indexer == 1
replace type = 3 if StatIncreaserSmall == 1
replace type = 4 if StatIncreaserLarge == 1

label var black "Black"
label var HPI "House Price Index (1000s)"
label var PersonalIncome "Income Per Capita (1000s)"
label var employed "Employment"
label var highschool "High School Degree"
label var somecollege "Some College Education"
label var EffectiveMinimumWage "Effective Minimum Wage"
label var age "Age"

*** Table A4: Unadjusted Differences Using CPS Data and $1 Cutoff and 2019 Post Period
preserve

gen post = 0 if year >= 2011 & year <= 2013
replace post = 1 if year == 2019

collapse employed_lowskill employed_young employed_primeage employed_midskill HPI_young PersonalIncome_young [aw=cmpwgt], by(type post)

* Round averages
foreach var of varlist employed_lowskill employed_young employed_primeage employed_midskill {
	replace `var' = round(`var', .001)
}

replace HPI_young = round(HPI_young, .1)
replace PersonalIncome_young = round(PersonalIncome_young, .01)

rename *_* v_*_*

drop if post ==.
reshape long v,  i(type post) j(var, string)
reshape wide v, i(type var) j(post)

rename v0 avg_2011_2013
rename v1 avg_2019

gen pre_post_diff = avg_2019 - avg_2011_2013
gen pre_post_diff_nc = pre_post_diff if type == 1
bysort var: egen pre_post_diff_nochange = max(pre_post_diff_nc)

gen change_nc = pre_post_diff - pre_post_diff_nochange
gen pct_change_2011_2013 = pre_post_diff / avg_2011_2013 * 100

drop pre_post_diff_nc pre_post_diff_nochange

replace change_nc = . if type == 1

replace var = "*emp*" if var == "_*emp"
replace var = "_HPI" if var == "_HPI_young"
replace var = "_PersonalIncome" if var == "_PersonalIncome_young"

* variable to order results
gen varnum = 1
replace varnum = 2 if var == "_employed_young"
replace varnum = 3 if var == "_employed_primeage"
replace varnum = 4 if var == "_employed_midskill"
replace varnum = 5 if var == "_HPI"
replace varnum = 6 if var == "_PersonalIncome"

sort varnum type

gen typestr = ""

replace typestr = "No Change" if type == 1
replace typestr = "Indexers" if type == 2
replace typestr = "Increasers < $1" if type == 3
replace typestr = "Increasers >= $1" if type == 4

order var typestr, first

drop type varnum pct_change_2011_2013

label var var "Variable"
label var typestr "Policy Category"
label var avg_2011_2013 "2011-2013"
label var avg_2019 "2019"
label var pre_post_diff "Change"
label var change_nc "Change Relative to Non-Increasers"

export excel using "$tabdir/unadjusted_differences_cps_cep_2019.xlsx", firstrow(varlabels) keepcellfmt replace

restore


preserve

gen post = 0 if year >= 2011 & year <= 2013
replace post = 1 if year == 2019

*** Table A3: Sample Summary Statistics: CPS and Supplemental Data for 2011-2013 and 2019

bysort post: eststo: estpost tabstat employed age black highschool somecollege HPI PersonalIncome EffectiveMinimumWage if dropout == 1 & age <= 25 [aw = cmpwgt], ///
	columns(statistics) statistics(mean sd)
bysort post: eststo: estpost tabstat employed age black highschool somecollege HPI PersonalIncome EffectiveMinimumWage if age <= 21 [aw = cmpwgt], ///
	columns(statistics) statistics(mean sd)
esttab using "$tabdir/summary_stats_cps_2019.csv", replace main(mean) aux(sd) label nomtitle nolegend nonotes
estimates clear

restore

*** Table Unadjusted Differences Using CPS Data and $1 Cutoff and 2015-2019 Post Period
preserve

gen post = 0 if year >= 2011 & year <= 2013
replace post = 1 if year >= 2015 & year <= 2019

collapse employed_lowskill employed_young employed_primeage employed_midskill HPI_young PersonalIncome_young [aw=cmpwgt], by(type post)

* Round averages
foreach var of varlist employed_lowskill employed_young employed_primeage employed_midskill {
	replace `var' = round(`var', .001)
}

replace HPI_young = round(HPI_young, .1)
replace PersonalIncome_young = round(PersonalIncome_young, .01)

rename *_* v_*_*

drop if post ==.
reshape long v,  i(type post) j(var, string)
reshape wide v, i(type var) j(post)

rename v0 avg_2011_2013
rename v1 avg_2015_2019

gen pre_post_diff = avg_2015_2019 - avg_2011_2013
gen pre_post_diff_nc = pre_post_diff if type == 1
bysort var: egen pre_post_diff_nochange = max(pre_post_diff_nc)

gen change_nc = pre_post_diff - pre_post_diff_nochange
gen pct_change_2011_2013 = pre_post_diff / avg_2011_2013 * 100

drop pre_post_diff_nc pre_post_diff_nochange

replace change_nc = . if type == 1

replace var = "*emp*" if var == "_*emp"
replace var = "_HPI" if var == "_HPI_young"
replace var = "_PersonalIncome" if var == "_PersonalIncome_young"

* variable to order results
gen varnum = 1
replace varnum = 2 if var == "_employed_young"
replace varnum = 3 if var == "_employed_primeage"
replace varnum = 4 if var == "_employed_midskill"
replace varnum = 5 if var == "_HPI"
replace varnum = 6 if var == "_PersonalIncome"

sort varnum type

gen typestr = ""

replace typestr = "No Change" if type == 1
replace typestr = "Indexers" if type == 2
replace typestr = "Increasers < $1" if type == 3
replace typestr = "Increasers >= $1" if type == 4

order var typestr, first

drop type varnum pct_change_2011_2013

label var var "Variable"
label var typestr "Policy Category"
label var avg_2011_2013 "2011-2013"
label var avg_2015_2019 "2015-2019"
label var pre_post_diff "Change"
label var change_nc "Change Relative to Non-Increasers"

export excel using "$tabdir/unadjusted_differences_cps_cep_20152019.xlsx", firstrow(varlabels) keepcellfmt replace

restore

preserve

gen post = 0 if year >= 2011 & year <= 2013
replace post = 1 if year >= 2015 & year <= 2019

*** Table A2: Sample Summary Statistics: CPS and Supplemental Data for 2011-2013 and 2015-2019

bysort post: eststo: estpost tabstat employed age black highschool somecollege HPI PersonalIncome EffectiveMinimumWage if dropout == 1 & age <= 25 [aw = cmpwgt], ///
	columns(statistics) statistics(mean sd)
bysort post: eststo: estpost tabstat employed age black highschool somecollege HPI PersonalIncome EffectiveMinimumWage if age <= 21 [aw = cmpwgt], ///
	columns(statistics) statistics(mean sd)
esttab using "$tabdir/summary_stats_cps_20152019.csv", replace main(mean) aux(sd) label nomtitle nolegend nonotes
estimates clear


restore

log close
exit, clear
